03. Testing your models

Testing

Start Quiz:

# Import statements 
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score
import pandas as pd
import numpy as np

# Import the train test split
# http://scikit-learn.org/0.16/modules/generated/sklearn.cross_validation.train_test_split.html


# Read in the data.
data = np.asarray(pd.read_csv('data.csv', header=None))
# Assign the features to the variable X, and the labels to the variable y. 
X = data[:,0:2]
y = data[:,2]

# Use train test split to split your data 
# Use a test size of 25% and a random state of 42
X_train, X_test, y_train, y_test = 

# Instantiate your decision tree model
model = None

# TODO: Fit the model to the training data.

# TODO: Make predictions on the test data
y_pred = None

# TODO: Calculate the accuracy and assign it to the variable acc on the test data.
acc = None
0.24539,0.81725,0
0.21774,0.76462,0
0.20161,0.69737,0
0.20161,0.58041,0
0.2477,0.49561,0
0.32834,0.44883,0
0.39516,0.48099,0
0.39286,0.57164,0
0.33525,0.62135,0
0.33986,0.71199,0
0.34447,0.81433,0
0.28226,0.82602,0
0.26613,0.75,0
0.26613,0.63596,0
0.32604,0.54825,0
0.28917,0.65643,0
0.80069,0.71491,0
0.80069,0.64181,0
0.80069,0.50146,0
0.79839,0.36988,0
0.73157,0.25,0
0.63249,0.18275,0
0.60023,0.27047,0
0.66014,0.34649,0
0.70161,0.42251,0
0.70853,0.53947,0
0.71544,0.63304,0
0.74309,0.72076,0
0.75,0.63596,0
0.75,0.46345,0
0.72235,0.35526,0
0.66935,0.28509,0
0.20622,0.94298,1
0.26613,0.8962,1
0.38134,0.8962,1
0.42051,0.94591,1
0.49885,0.86404,1
0.31452,0.93421,1
0.53111,0.72076,1
0.45276,0.74415,1
0.53571,0.6038,1
0.60484,0.71491,1
0.60945,0.58333,1
0.51267,0.47807,1
0.50806,0.59211,1
0.46198,0.30556,1
0.5288,0.41082,1
0.38594,0.35819,1
0.31682,0.31433,1
0.29608,0.20906,1
0.36982,0.27632,1
0.42972,0.18275,1
0.51498,0.10965,1
0.53111,0.20906,1
0.59793,0.095029,1
0.73848,0.086257,1
0.83065,0.18275,1
0.8629,0.10965,1
0.88364,0.27924,1
0.93433,0.30848,1
0.93433,0.19444,1
0.92512,0.43421,1
0.87903,0.43421,1
0.87903,0.58626,1
0.9182,0.71491,1
0.85138,0.8348,1
0.85599,0.94006,1
0.70853,0.94298,1
0.70853,0.87281,1
0.59793,0.93129,1
0.61175,0.83187,1
0.78226,0.82895,1
0.78917,0.8962,1
0.90668,0.89912,1
0.14862,0.92251,1
0.15092,0.85819,1
0.097926,0.85819,1
0.079493,0.91374,1
0.079493,0.77632,1
0.10945,0.79678,1
0.12327,0.67982,1
0.077189,0.6886,1
0.081797,0.58626,1
0.14862,0.58041,1
0.14862,0.5307,1
0.14171,0.41959,1
0.08871,0.49269,1
0.095622,0.36696,1
0.24539,0.3962,1
0.1947,0.29678,1
0.16935,0.22368,1
0.15553,0.13596,1
0.23848,0.12427,1
0.33065,0.12427,1
0.095622,0.2617,1
0.091014,0.20322,1
# Import statements 
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score
import pandas as pd
import numpy as np

# Import the train test split
# http://scikit-learn.org/0.16/modules/generated/sklearn.cross_validation.train_test_split.html
from sklearn.cross_validation import train_test_split

# Read the data.
data = np.asarray(pd.read_csv('data.csv', header=None))
# Assign the features to the variable X, and the labels to the variable y. 
X = data[:,0:2]
y = data[:,2]

# Use train test split to split your data 
# Use a test size of 25% and a random state of 42
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)

# TODO: Create the decision tree model and assign it to the variable model.
model = DecisionTreeClassifier()

# TODO: Fit the model to the training data.
model.fit(X_train,y_train)

# TODO: Make predictions on the test data
y_pred = model.predict(X_test)

# TODO: Calculate the accuracy and assign it to the variable acc. on the test data
acc = accuracy_score(y_test, y_pred)